##########################################################
## This code is to merge the AOD data into              ##
## the power plant dataset.                             ##
##########################################################
rm(list=ls())
options(scipen=100,digits=10)

## Note: rgdal and gdalUtils packages are only available in the prevous R versions (R 4.2.0 and earlier)

library(rgdal)
library(gdalUtils)
library(raster)
library(dplyr)
library(sf)
library(tidyverse)
library(rgeos)
library(foreign)

# power plants coordinates, downloaded from EIA (https://www.eia.gov/maps/layer_info-m.php). 
setwd("data/raw data/PowerPlants_US_EIA_April2019") 
data_initial=read.dbf("PowerPlants_US_201904.dbf")
data=data_initial[c(1,30,31)]


# Transfer data to spatial
coordinates(data)=c("Longitude","Latitude")
# Set Coordinates Referrence System as long-lat
proj4string(data)=CRS("+proj=longlat +ellps=WGS84 +datum=WGS84")
# Projection: using Albers Equal Area Conic projection system  
# https://zia207.github.io/geospatial-data-science.github.io/map-projection-coordinate-reference-systems.html#albers-equal-area-conic
centroids_UTM <-spTransform(data, CRS("+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 
                                  +datum=WGS84 +units=m +no_defs "))
centroids_UTM <- st_as_sf(centroids_UTM) 

## Constract Polygon as 3km radius circle
# Transfer Spatial Point to Simple Feature to set the circle buffer
dat_sf <- st_as_sf(centroids_UTM, coords = c("X", "Y"))
# Set the circle buffer, generate Simple Feature Polygon for circles
dat_circles <- st_buffer(dat_sf, dist = 3000)
# Transfer Simple Feature back to Spatial Polygon
centroids_UTM_circle=as(dat_circles, 'Spatial')
# Convert Coordinates Referrence System From UTM back to log-lat
Circles<- spTransform(centroids_UTM_circle, CRS("+proj=longlat +ellps=WGS84 +datum=WGS84"))


# Set working directory for Satillite data
setwd("/Volumes/HL-WD/shutdown/1_AOD_data/12_AOD_processed")
# Get the vector of folder names. Each folder contains one day's granules datafiles
out.file=list.files ()
for(i in 1:length(out.file)){
  setwd("data/processed data/AOD_processed")
  data1=data_initial
  data_AOD= read.csv(paste(out.file[i]))
  names(data_AOD)[1]="ID"
  time=unique(data_AOD$Time)
  centroids_AOD=data_AOD[c(2,3,4)]
  coordinates(centroids_AOD)=c("long","lat")
  
  proj4string(centroids_AOD)=CRS("+proj=longlat +ellps=WGS84 +datum=WGS84")
  centroids_UTM_AOD<- spTransform(centroids_AOD, CRS("+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 
                                  +datum=WGS84 +units=m +no_defs "))
  centroid_UTM_AOD_coord=as.data.frame(coordinates(centroids_UTM_AOD))
  
  # Set square coordinates for AOD pixels
  square=cbind(centroid_UTM_AOD_coord$long-1500,centroid_UTM_AOD_coord$lat+1500, #NW corner
               centroid_UTM_AOD_coord$long+1500,centroid_UTM_AOD_coord$lat+1500, #NE corner
               centroid_UTM_AOD_coord$long+1500,centroid_UTM_AOD_coord$lat-1500, #SE corner
               centroid_UTM_AOD_coord$long-1500,centroid_UTM_AOD_coord$lat-1500, #SW corner
               centroid_UTM_AOD_coord$long-1500,centroid_UTM_AOD_coord$lat+1500#NW corner again to close polygon
               )
  ID=data_AOD$ID
  # Create Polygon
  polys <- SpatialPolygons(mapply(function(poly, id) 
  {
    xy <- matrix(poly, ncol=2, byrow=TRUE)
    Polygons(list(Polygon(xy)), ID=id)
  }, 
  split(square, row(square)), ID),
  proj4string=CRS(as.character("+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 
                                  +datum=WGS84 +units=m +no_defs ")))
  # Add AOD data to polygon to create spatial polygon dataframe
  AOD_poly=SpatialPolygonsDataFrame(polys,as.data.frame(data_AOD$AOD))
  # Transder AOD_poly back to simple feature, this increase the calculation speed
  AOD_st=st_as_sf(AOD_poly)
  # Find the intersection part of well circles and AOD cells
  gg = st_intersection(AOD_st,dat_circles)
  # Calculate the overlapping area size for weighted average
  if(lengths(gg)[1]==0) {
    data1$AOD=NA
    data1$AOD_DATE=as.Date(time)
    data2=data1
  } else {
    area=gArea(as(gg,'Spatial'),byid = TRUE)
    # Construct a dataframe with well id and overlapping aod cells
    aa=as.data.frame(gg)
    aa$AOD_ID=rownames(aa)
    aa$area=area
    
    # calculate the aggregate well circle area covered by AOD cells
    aa1=aggregate(aa[, 5], list(aa$Plant_Code), sum)
    names(aa1)[1]="Plant_Code"
    # (not necessary but just in case) find the well with no AOD coverage
    not_match=subset(data1, !(data1$Plant_Code%in%aa1$Plant_Code))
    # merge aggregate area coverage to the dataframe, in order to calculate the weight for each AOD
    aa2=merge(aa,aa1, by="Plant_Code",all.x=TRUE)
    names(aa2)[2]="AOD"
    # Calculate the weights
    aa2$weights=aa2$area/aa2$x
    # Calculate the weighted AOD for each AOD cell
    aa2$weighted_AOD=aa2$AOD*aa2$weights
    # Calculate the weighted AOD for each well
    Weighted_AOD=aggregate(aa2[, 8], list(aa2$Plant_Code), sum)
    names(Weighted_AOD)[1]="Plant_Code"
    # Merge weighted AOD to the original well dataset
    data2=merge(data1,Weighted_AOD, by="Plant_Code",all.x=TRUE)
    names(data2)[ncol(data2)]="AOD"
    # Set time variable
    data2$AOD_DATE=as.Date(time)
  }
  # Store the processed daily AOD-plant merged datafile (there are too many data files, did not included in the replication package)
  setwd("data/processed data/AOD_Plants")
  write.csv(data2,paste(time,".csv"))
  
}


#############################################################################################
# Run the following code separately from the previous codes
# Combine daily data into panel
#############################################################################################
setwd("data/processed data/AOD_Plants")
out.file=list.files ()
data2=read.csv(out.file[1])
# Initial empty dataframe to store all daily data
all_data=data.frame(matrix(ncol = ncol(data2), nrow = 0))
names(all_data)=names(data2)
for(n in 1:length(out.file)){
  # 
  daily_data=read.csv(out.file[n])
  daily_data=subset(daily_data,!is.na(AOD))
  all_data=rbind(all_data,daily_data)
}
setwd("data/processed data/")
write.csv(all_data,"AOD_all.csv")

